library(synapser)
New synapser version detected:
You are using synapser version 0.11.7.
synapser version 1.0.59 is detected at http://ran.synapse.org.
To upgrade to the latest version of synapser, please run the following command:
install.packages("synapser", repos="http://ran.synapse.org")
TERMS OF USE NOTICE:
When using Synapse, remember that the terms and conditions of use require that you:
1) Attribute data contributors when discussing these data or results from these data.
2) Not discriminate, identify, or recontact individuals or groups represented by the data.
3) Use and contribute only data de-identified to HIPAA standards.
4) Redistribute data only under these same terms of use.
# library(paxtoolsr)
# library(org.Hs.eg.db)
# library(clusterProfiler)
# library(HotNetvieweR)
library(igraph)
Attaching package: ‘igraph’
The following objects are masked from ‘package:stats’:
decompose, spectrum
The following object is masked from ‘package:base’:
union
library(tidygraph)
Attaching package: ‘tidygraph’
The following object is masked from ‘package:igraph’:
groups
The following object is masked from ‘package:stats’:
filter
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
✓ ggplot2 3.3.3 ✓ purrr 0.3.4
✓ tibble 3.0.6 ✓ dplyr 1.0.4
✓ tidyr 1.1.2 ✓ stringr 1.4.0
✓ readr 1.4.0 ✓ forcats 0.5.1
── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
x dplyr::as_data_frame() masks tibble::as_data_frame(), igraph::as_data_frame()
x purrr::compose() masks igraph::compose()
x tidyr::crossing() masks igraph::crossing()
x dplyr::filter() masks tidygraph::filter(), stats::filter()
x dplyr::groups() masks tidygraph::groups(), igraph::groups()
x dplyr::lag() masks stats::lag()
x purrr::simplify() masks igraph::simplify()
# source(paste0(here::here(),'/../biodom_tally.R'))
# source(paste0(here::here(),'/../biodom_enr.R'))
# source(paste0(here::here(),'/../biodom_enr_plots.R'))
theme_set(theme_bw())
Grab relevant data from synapse, including:
1) Target Risk Scores (syn25575156) and Omics Scores (syn22758536)
2) Biodomain Definitions (syn25428992)
3) Pathway Commons Full Graph, v12 (syn51080932)
4) Human Protein Atlas tissue immunohistochemistry micro array data (syn51074598)
5) RNA Brain GTEx (syn51074639)
6) AMP-AD cohort pairwise partial correlations (ROSMAP, Mayo, MSBB); these are quite large! (> 6 GB), wait to
load until performing this analysis
7) SEA-AD single cell data (syn51441105)
synLogin()
Welcome, Greg Cary!NULL
# target risk scores
scores <- read_csv(synTableQuery('select * from syn25575156',
includeRowIdAndRowVersion = F)$filepath)
omics <- read_csv(synTableQuery('select * from syn22758536',
includeRowIdAndRowVersion = F)$filepath)
Extract the core NW tables
net.tbl <- igraph::as_data_frame(net)
v.attr <- tibble( na = vertex.attributes(net) ) %>%
t() %>% as_tibble(rownames = NA, .name_repair = 'unique') %>% unnest(everything()) %>%
rename_with(., ~names(vertex.attributes(net)), everything())
e.attr <- tibble( ea = edge.attributes(net) ) %>%
t() %>% as_tibble(rownames = NA, .name_repair = 'unique') %>% unnest(everything()) %>%
rename_with(., ~names(edge.attributes(net)), everything())
Specify brain tissue types
brain.tissues <- c(
"caudate", "cerebellum", "cerebral cortex", "hippocampus",
"hypothalamus", "pituitary gland", "dorsal raphe",
"choroid plexus", "substantia nigra"
)
brain.genes <- tissue.array %>% filter(
Tissue %in% brain.tissues,
Level %in% c('Low','Medium','High','Ascending','Descending'),
Reliability %in% c('Enhanced','Supported','Approved')
) %>% pull(Gene.name) %>% unique()
not.brain <- tissue.array %>% filter(
Tissue %in% brain.tissues,
Level %in% c('Not detected'),
Reliability %in% c('Enhanced','Supported','Approved')
) %>% pull(Gene.name) %>% unique()
# also "brain genes":
# scores %>% filter(isScored_omics == 'Y', OmicsScore > 0) %>% pull(GeneName) %>% unique()
brain.gtex %>%
mutate(hpa = case_when(Gene.name %in% brain.genes ~
paste0('brain (n = ', length(intersect(Gene.name, brain.genes)),' genes)'),
Gene.name %in% not.brain ~
paste0('not brain (n = ', length(intersect(Gene.name,not.brain)),' genes)'),
T ~ NA_character_)) %>%
filter(!is.na(hpa)) %>%
ggplot(aes( log10(nTPM), fill = hpa))+
geom_density(aes(color = hpa), alpha = .1)+
# geom_histogram(position = 'dodge')+
# scale_y_log10()+
scale_fill_discrete('')+scale_color_discrete('')+
theme(legend.position = 'top')
# also "brain genes":
tad.deg <- scores %>% filter(isScored_omics == 'Y', OmicsScore > 0) %>% pull(GeneName) %>% unique()
brain.gtex %>%
mutate(hpa = case_when(Gene.name %in% tad.deg ~ paste0('TAD DEG (n = ', length(intersect(Gene.name,tad.deg)),' genes)'),
T ~ paste0('not TAD DEG (n = ', length(setdiff(Gene.name, tad.deg)),' genes)'),
)) %>%
filter(!is.na(hpa)) %>%
ggplot(aes( log10(nTPM), fill = hpa))+
geom_density(aes(color = hpa), alpha = .1)+
# geom_histogram(position = 'dodge')+
# scale_y_log10()+
scale_fill_discrete('')+scale_color_discrete('')+
theme(legend.position = 'top')
# also "brain genes":
tad.deg <- scores %>% filter(isScored_omics == 'Y', OmicsScore > 0) %>% pull(GeneName) %>% unique()
brain.gtex %>%
mutate(
hpa = case_when(
Gene.name %in% brain.genes ~
paste0('HPA brain (n = ', length(intersect(Gene.name, brain.genes)),' genes)'),
Gene.name %in% tad.deg ~ paste0('TAD DEG (n = ', length(intersect(Gene.name,tad.deg)),' genes)'),
Gene.name %in% not.brain ~ paste0('HPA not brain (n = ', length(intersect(Gene.name,not.brain)),' genes)'),
T ~ paste0('other (n = ', length(setdiff(Gene.name, union(union(brain.genes, tad.deg), not.brain))) ,' genes)')
)) %>%
ggplot(aes( log10(nTPM), fill = hpa))+
geom_density(aes(color = hpa), alpha = .1)+
# geom_histogram(position = 'dodge')+
# scale_y_log10()+
scale_fill_discrete('')+scale_color_discrete('')+
guides(fill = guide_legend(nrow=2))+
theme(legend.position = 'top')
t = .9
tmp = seaad.broad %>%
# filter(gene %in% not.brain) %>%
# mutate(hpa = 'not') %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ 'brain',
gene %in% not.brain ~ 'not',
gene %in% tad.deg ~ 'deg',
T ~ 'other'
)
) %>%
group_by(cellType, hpa) %>%
summarise(
exp = quantile(upperQ_exp, t),
fxn = quantile(fxn_exp, t)
)
`summarise()` has grouped output by 'cellType'. You can override using the `.groups` argument.
# bind_rows(
# tmp,
# tmp %>% filter(hpa == 'not') %>% mutate(exp = exp + .142, fxn = fxn + .142, hpa = 'new')
# ) %>%
tmp %>%
ggplot(aes(fxn, exp, group = hpa))+
geom_smooth(method = 'lm', color = 'grey20', lwd = .5, lty = 2)+
# geom_abline(intercept = 1.5294, slope = -2.562569)+
geom_abline(intercept = 2.3, slope = -2.5)+
annotate(geom = 'text', x = .65, y = 0, label = 'y = -2.5x + 2.3', hjust = 0)+
geom_point(aes(color = cellType, shape = hpa), size = 2, alpha = .5)+
labs(y = 'quantile expression', x = 'quantile fraction expressing',
subtitle = paste0('plotting quantile: ', t))
t = .9
seaad.broad %>%
# filter(gene %in% not.brain) %>%
# mutate(hpa = 'not') %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ 'brain',
gene %in% not.brain ~ 'not',
gene %in% tad.deg ~ 'deg',
T ~ 'other'
)
) %>%
filter(hpa == 'not') %>%
group_by(cellType, hpa) %>%
summarise( exp = quantile(upperQ_exp, t),
fxn = quantile(fxn_exp, t)
) %>%
lm(exp~fxn, data = .) %>%
broom::tidy()
`summarise()` has grouped output by 'cellType'. You can override using the `.groups` argument.
x = map_dfr(
seq(1.5,3.5,.01),
~{
seaad.broad %>%
filter( upperQ_exp >= -2.5 * fxn_exp + .x) %>%
summarise(
int = .x,
n.brain = length(intersect(gene, brain.genes )),
n.not = length(intersect(gene,not.brain)),
n.deg = length(intersect(gene,tad.deg)),
n.other = length(unique(setdiff(gene, union(union(brain.genes, tad.deg), not.brain)))),
diff.b = (n.brain - n.not),
pct.b = 100*(diff.b/n.brain),
diff.d = n.deg - n.not,
pct.d = 100*(diff.d/n.brain)
)
}
) %>%
arrange(desc(diff.b), desc(pct.d));
qplot(x$int, x$diff.b, geom = 'line')
DT::datatable(x)
t = .9
thresh = seaad.broad %>%
filter(gene %in% not.brain) %>%
group_by(broad, cellType) %>%
summarise(
exp1 = quantile(upperQ_exp, t),
fxn1 = quantile(fxn_exp, t)
) %>%
mutate(
exp = exp1+.142,
fxn = fxn1+.142,
hpa = 'new.dat'
)
`summarise()` has grouped output by 'broad'. You can override using the `.groups` argument.
seaad.broad %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ 'brain',
gene %in% not.brain ~ 'not',
gene %in% tad.deg ~ 'deg',
T ~ 'other')
) %>%
ggplot(aes(fxn_exp, upperQ_exp, group = hpa))+
geom_abline(intercept = 2.25, slope = -2.5)+
# annotate(geom = 'text', x = .65, y = 0, label = 'y = -2.5x + 2.5', hjust = 0)+
geom_point(data = thresh, aes(x = fxn, y = exp), size = 2, alpha = .5)+
geom_smooth(method = 'lm', aes(color = hpa), lwd = .5, lty = 2)+
labs(y = 'expression', x = 'fraction expressing'
#, subtitle = paste0('plotting quantile: ', t)
)+
facet_wrap(~broad, scales = 'free')
t = .9
thresh = seaad.broad %>%
filter(gene %in% not.brain) %>%
group_by(broad, cellType) %>%
summarise(
exp1 = quantile(upperQ_exp, t),
fxn1 = quantile(fxn_exp, t)
) %>%
mutate(
exp = exp1+.142,
fxn = fxn1+.142,
hpa = 'new.dat'
)
`summarise()` has grouped output by 'broad'. You can override using the `.groups` argument.
seaad.broad %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ 'brain',
gene %in% not.brain ~ 'not',
gene %in% tad.deg ~ 'deg',
T ~ 'other')
) %>%
# group_by(cellType) %>%
# filter(
# fxn_exp >= thresh$fxn[ which(thresh$cellType == cellType) ] %>% unique(),
# upperQ_exp >= thresh$exp[ which(thresh$cellType == cellType) ] %>% unique(),
# .preserve = T
# ) %>%
# ungroup() %>%
filter( upperQ_exp >= -2.5 * fxn_exp + 2.3) %>%
ggplot(aes(fxn_exp, upperQ_exp, group = hpa))+
geom_abline(intercept = 2.3, slope = -2.5)+
# annotate(geom = 'text', x = .65, y = 0, label = 'y = -2.5x + 2.5', hjust = 0)+
geom_point(data = thresh, aes(x = fxn, y = exp), size = 2, alpha = .5)+
geom_smooth(method = 'lm', aes(color = hpa), lwd = .5, lty = 2)+
labs(y = 'expression', x = 'fraction expressing'
#, subtitle = paste0('plotting quantile: ', t)
)+
facet_wrap(~broad, scales = 'free')
seaad.broad %>%
ggplot(aes(upperQ_exp, fxn_exp))+
geom_bin2d(aes(fill = ..count.. ))+
viridis::scale_fill_viridis(trans = 'log10', option = 'A')+
ggtitle('no filter')+
facet_wrap(~broad)
thresh = seaad.broad %>%
filter(gene %in% not.brain) %>%
group_by(broad, cellType) %>%
summarise(
exp1 = quantile(upperQ_exp, t),
fxn1 = quantile(fxn_exp, t)
) %>%
mutate(
exp = exp1+.142,
fxn = fxn1+.142,
hpa = 'new.dat'
)
`summarise()` has grouped output by 'broad'. You can override using the `.groups` argument.
seaad.broad %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ 'brain',
gene %in% tad.deg ~ 'deg',
gene %in% not.brain ~ 'not',
T ~ 'other')
) %>%
filter(!is.na(hpa)) %>%
# group_by(cellType) %>%
# filter(
# fxn_exp >= thresh$fxn[ which(thresh$cellType == cellType) ] %>% unique(),
# upperQ_exp >= thresh$exp[ which(thresh$cellType == cellType) ] %>% unique(),
# .preserve = T
# ) %>%
# ungroup() %>%
filter( upperQ_exp >= -2.5 * fxn_exp + 2.3) %>%
# group_by(cellType, hpa) %>%
# mutate( exp = quantile(upperQ_exp, .9),
# fxn = quantile(fxn_exp, .9)
# ) %>%
# group_by(cellType) %>%
# filter( ) %>%
# ungroup() %>%
mutate(
n = case_when(
gene %in% brain.genes ~ length(intersect(gene, brain.genes)),
gene %in% tad.deg ~ length(intersect(gene,tad.deg)),
gene %in% not.brain ~ length(intersect(gene,not.brain)),
T ~ length(unique(setdiff(gene, union(union(brain.genes, tad.deg), not.brain))))
)) %>%
ggplot(aes(upperQ_exp, fxn_exp))+
geom_bin2d(aes(fill = ..count.. ))+
viridis::scale_fill_viridis(trans = 'log10', option = 'A')+
facet_wrap(~broad)
seaad.broad %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ 'brain.genes',
gene %in% tad.deg ~ 'tad.deg',
gene %in% not.brain ~ 'not.brain',
T ~ 'other')
) %>%
filter(!is.na(hpa)) %>%
group_by(hpa) %>%
mutate(
n = length(unique(gene)),
hpa1 = paste0(hpa, ', ', n)
) %>%
ggplot(aes(upperQ_exp, fxn_exp))+
geom_bin2d(aes(fill = ..count.. ))+
viridis::scale_fill_viridis(trans = 'log10', option = 'A')+
ggtitle('no filter')+
facet_grid(broad~hpa1)
NA
thresh = seaad.broad %>%
filter(gene %in% not.brain) %>%
group_by(broad, cellType) %>%
summarise(
exp1 = quantile(upperQ_exp, t),
fxn1 = quantile(fxn_exp, t)
) %>%
mutate(
exp = exp1+.142,
fxn = fxn1+.142,
hpa = 'new.dat'
)
`summarise()` has grouped output by 'broad'. You can override using the `.groups` argument.
seaad.broad %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ 'brain',
gene %in% tad.deg ~ 'tad.deg',
gene %in% not.brain ~ 'not.brain',
T ~ 'other')
) %>%
filter(!is.na(hpa)) %>%
# group_by(cellType) %>%
# filter(
# fxn_exp >= thresh$fxn[ which(thresh$cellType == cellType) ] %>% unique(),
# upperQ_exp >= thresh$exp[ which(thresh$cellType == cellType) ] %>% unique(),
# .preserve = T
# ) %>%
# ungroup() %>%
filter( upperQ_exp >= -2.5 * fxn_exp + 2.3) %>%
# filter( upperQ_exp >= 2 | (upperQ_exp >= 1.33 & fxn_exp >= 0.2) ) %>%
# group_by(cellType, hpa) %>%
# mutate(top_exp = quantile(upperQ_exp, 0.75),
# top_fxn = quantile(fxn_exp, 0.95)) %>%
# group_by(cellType) %>%
# filter( upperQ_exp > unique(top_exp[hpa == 'not.brain']) ,
# fxn_exp > unique(top_fxn[hpa == 'not.brain']) ) %>%
# ungroup() %>%
group_by(hpa) %>%
mutate(
n = length(unique(gene)),
hpa1 = paste0(hpa, ', ', n)
) %>%
ggplot(aes(upperQ_exp, fxn_exp))+
geom_bin2d(aes(fill = ..count.. ))+
viridis::scale_fill_viridis(trans = 'log10', option = 'A')+
ggtitle('filtered')+
facet_grid(broad~hpa1)
NA
# also "brain genes":
# scores %>% filter(isScored_omics == 'Y', OmicsScore > 0) %>% pull(GeneName) %>% unique()
seaad.broad %>%
# filter(is.na(group)) %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ paste0('HPA brain (n = ', length(intersect(gene, brain.genes)),' genes)'),
gene %in% tad.deg ~ paste0('TAD DEG (n = ', length(intersect(gene,tad.deg)),' genes)'),
gene %in% not.brain ~ paste0('HPA not brain (n = ', length(intersect(gene,not.brain)),' genes)'),
T ~ paste0('other (n = ', length(unique(setdiff(gene, union(union(brain.genes, tad.deg), not.brain)))) ,' genes)')
)
) %>%
filter(!is.na(hpa)) %>%
ggplot(aes( fxn_exp, fill = hpa))+
geom_density(aes(color = hpa), alpha = .1)+
# geom_histogram(position = 'dodge')+
# scale_y_log10()+
scale_fill_discrete('')+scale_color_discrete('')+
guides(fill = guide_legend(nrow=2))+
ggforce::facet_zoom(xlim = c(0,.25), ylim = c(0,5))+
theme(legend.position = 'top')
# seaad.broad %>% group_by(cellType) %>% summarise(mn = median(fraction_expressed))
seaad.broad %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ paste0('HPA brain (n = ', length(intersect(gene, brain.genes)),' genes)'),
gene %in% tad.deg ~ paste0('TAD DEG (n = ', length(intersect(gene,tad.deg)),' genes)'),
gene %in% not.brain ~ paste0('HPA not brain (n = ', length(intersect(gene,not.brain)),' genes)'),
T ~ paste0('other (n = ', length(unique(setdiff(gene, union(union(brain.genes, tad.deg), not.brain)))) ,' genes)')
)
) %>%
group_by(cellType, hpa) %>%
mutate(top_exp = quantile(upperQ_exp, 0.75),
top_fxn = quantile(fxn_exp, 0.75)) %>%
ggplot(., aes(fxn_exp, cellType, fill = hpa)) +
geom_violin(scale = 'width', draw_quantiles = c(.5)) +
geom_point(aes(x = top_fxn, color = hpa))+
# stat_summary(fun = function(x) quantile(x,0.5), geom="point", size=2, color="red", position = 'dodge')+
# ggforce::facet_zoom(xlim = c(0,0.08), horizontal = F)
coord_cartesian(xlim = c(0,0.25))
# also "brain genes":
# scores %>% filter(isScored_omics == 'Y', OmicsScore > 0) %>% pull(GeneName) %>% unique()
seaad.broad %>%
# filter(is.na(group)) %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ paste0('HPA brain (n = ', length(intersect(gene, brain.genes)),' genes)'),
gene %in% tad.deg ~ paste0('TAD DEG (n = ', length(intersect(gene,tad.deg)),' genes)'),
gene %in% not.brain ~ paste0('HPA not brain (n = ', length(intersect(gene,not.brain)),' genes)'),
T ~ paste0('other (n = ', length(unique(setdiff(gene, union(union(brain.genes, tad.deg), not.brain)))) ,' genes)')
)
) %>%
filter(!is.na(hpa)) %>%
ggplot(aes( fxn_exp, fill = hpa))+
# geom_density(aes(color = hpa), alpha = .1)+
geom_histogram(position = 'dodge')+
# scale_y_log10()+
scale_fill_discrete('')+scale_color_discrete('')+
guides(fill = guide_legend(nrow=2))+
ggforce::facet_zoom(xlim = c(0,.25), ylim = c(0,1e5))+
theme(legend.position = 'top')
# also "brain genes":
# scores %>% filter(isScored_omics == 'Y', OmicsScore > 0) %>% pull(GeneName) %>% unique()
seaad.broad %>%
# filter( upperQ_exp >= 2 | (upperQ_exp >= 1.33 & fxn_exp >= 0.2) ) %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ 'brain.genes',
gene %in% tad.deg ~ 'tad.deg',
gene %in% not.brain ~ 'not.brain',
T ~ 'other')
) %>%
filter(!is.na(hpa)) %>%
# group_by(cellType, hpa) %>%
# mutate(top_exp = quantile(upperQ_exp, 0.75),
# top_fxn = quantile(fxn_exp, 0.95)) %>%
# group_by(cellType) %>%
# filter( upperQ_exp > unique(top_exp[hpa == 'not.brain']) ,
# fxn_exp > unique(top_fxn[hpa == 'not.brain']) ) %>%
# ungroup() %>%
filter( upperQ_exp >= -2.5 * fxn_exp + 2.3) %>%
# filter( upperQ_exp >= 2 | (upperQ_exp >= 1.33 & fxn_exp >= 0.2) ) %>%
group_by(hpa) %>%
mutate(
n = length(unique(gene)),
hpa1 = paste0(hpa, ', ', n)
) %>%
ggplot(aes( fxn_exp, fill = hpa1))+
# geom_density(aes(color = hpa1), alpha = .1)+
geom_histogram(position = 'dodge')+
# scale_y_log10()+
scale_fill_discrete('')+scale_color_discrete('')+
guides(fill = guide_legend(nrow=2))+
ggforce::facet_zoom(xlim = c(0,.25), ylim = c(0,1e5))+
theme(legend.position = 'top')
# also "brain genes":
# scores %>% filter(isScored_omics == 'Y', OmicsScore > 0) %>% pull(GeneName) %>% unique()
seaad.broad %>%
# filter( upperQ_exp >= 2 | (upperQ_exp >= 1.33 & fxn_exp >= 0.2) ) %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ 'brain.genes',
gene %in% tad.deg ~ 'tad.deg',
gene %in% not.brain ~ 'not.brain',
T ~ 'other')
) %>%
filter(!is.na(hpa)) %>%
# group_by(cellType, hpa) %>%
# mutate(top_exp = quantile(upperQ_exp, 0.75),
# top_fxn = quantile(fxn_exp, 0.95)) %>%
# group_by(cellType) %>%
# filter( upperQ_exp > unique(top_exp[hpa == 'not.brain']) ,
# fxn_exp > unique(top_fxn[hpa == 'not.brain']) ) %>%
# ungroup() %>%
filter( upperQ_exp >= -2.5 * fxn_exp + 2.3) %>%
# filter( upperQ_exp >= 2 | (upperQ_exp >= 1.33 & fxn_exp >= 0.2) ) %>%
group_by(hpa) %>%
mutate(
n = length(unique(gene)),
hpa1 = paste0(hpa, ', ', n)
) %>%
ggplot(aes( fxn_exp, color = hpa1))+
geom_density( alpha = .1)+
# geom_histogram(position = 'dodge')+
# scale_y_log10()+
geom_vline(xintercept = 0.2, lty = 2)+
scale_fill_discrete('')+scale_color_discrete('')+
guides(fill = guide_legend(nrow=2))+
ggforce::facet_zoom(xlim = c(0,.25), ylim = c(0,1.7))+#
theme(legend.position = 'top')
# seaad.broad %>% group_by(cellType) %>% summarise(mn = median(fraction_expressed))
seaad.broad %>%
# filter( upperQ_exp >= 2 | (upperQ_exp >= 1.33 & fxn_exp >= 0.2) ) %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ 'brain.genes',
gene %in% tad.deg ~ 'tad.deg',
gene %in% not.brain ~ 'not.brain',
T ~ 'other')
) %>%
filter(!is.na(hpa)) %>%
# group_by(cellType, hpa) %>%
# mutate(top_exp = quantile(upperQ_exp, 0.75),
# top_fxn = quantile(fxn_exp, 0.95)) %>%
# group_by(cellType) %>%
# filter( upperQ_exp > unique(top_exp[hpa == 'not.brain']) ,
# fxn_exp > unique(top_fxn[hpa == 'not.brain']) ) %>%
# ungroup() %>%
filter( upperQ_exp >= -2.5 * fxn_exp + 2.3) %>%
# filter( upperQ_exp >= 2 | (upperQ_exp >= 1.33 & fxn_exp >= 0.2) ) %>%
group_by(hpa) %>%
mutate(
n = length(unique(gene)),
hpa1 = paste0(hpa, ', ', n)
) %>%
ggplot(., aes(fxn_exp, cellType, fill = hpa1)) +
geom_violin(scale = 'width', draw_quantiles = c(.5), trim = F) +
# geom_point(aes(x = top_fxn, color = hpa))+
# stat_summary(fun = function(x) quantile(x,0.5), geom="point", size=2, color="red", position = 'dodge')+
# ggforce::facet_zoom(xlim = c(0,0.08), horizontal = F)
coord_cartesian(xlim = c(0,1))
# also "brain genes":
# scores %>% filter(isScored_omics == 'Y', OmicsScore > 0) %>% pull(GeneName) %>% unique()
seaad.broad %>%
# filter(is.na(group)) %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ paste0('HPA brain (n = ', length(intersect(gene, brain.genes)),' genes)'),
gene %in% tad.deg ~ paste0('TAD DEG (n = ', length(intersect(gene,tad.deg)),' genes)'),
gene %in% not.brain ~ paste0('HPA not brain (n = ', length(intersect(gene,not.brain)),' genes)'),
T ~ paste0('other (n = ', length(unique(setdiff(gene, union(union(brain.genes, tad.deg), not.brain)))) ,' genes)')
)
) %>%
filter(!is.na(hpa)) %>%
ggplot(aes( (upperQ_exp), fill = hpa))+
geom_density(aes(color = hpa), alpha = .1)+
# geom_histogram(position = 'dodge')+
# scale_y_log10()+
scale_fill_discrete('')+scale_color_discrete('')+
guides(fill = guide_legend(nrow=2))+
ggforce::facet_zoom(xlim = c(0,2), ylim = c(0,1.3) )+
theme(legend.position = 'top')
# seaad.broad %>% group_by(cellType) %>% summarise(mn = median(fraction_expressed))
seaad.broad %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ paste0('HPA brain (n = ', length(intersect(gene, brain.genes)),' genes)'),
gene %in% tad.deg ~ paste0('TAD DEG (n = ', length(intersect(gene,tad.deg)),' genes)'),
gene %in% not.brain ~ paste0('HPA not brain (n = ', length(intersect(gene,not.brain)),' genes)'),
T ~ paste0('other (n = ', length(unique(setdiff(gene, union(union(brain.genes, tad.deg), not.brain)))) ,' genes)')
)
) %>%
group_by(cellType, hpa) %>%
mutate(top_exp = quantile(upperQ_exp, 0.75),
top_fxn = quantile(fxn_exp, 0.75)) %>%
ggplot(., aes(upperQ_exp, cellType, fill = hpa)) +
geom_violin(scale = 'width', draw_quantiles = c(.5)) +
# geom_point(aes(x = top_exp, color = hpa))+
# stat_summary(fun = function(x) quantile(x,0.5), geom="point", size=2, color="red", position = 'dodge')+
# ggforce::facet_zoom(xlim = c(0,0.08), horizontal = F)
coord_cartesian(xlim = c(0,6))
# also "brain genes":
# scores %>% filter(isScored_omics == 'Y', OmicsScore > 0) %>% pull(GeneName) %>% unique()
seaad.broad %>%
# filter(is.na(group)) %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ paste0('HPA brain (n = ', length(intersect(gene, brain.genes)),' genes)'),
gene %in% tad.deg ~ paste0('TAD DEG (n = ', length(intersect(gene,tad.deg)),' genes)'),
gene %in% not.brain ~ paste0('HPA not brain (n = ', length(intersect(gene,not.brain)),' genes)'),
T ~ paste0('other (n = ', length(unique(setdiff(gene, union(union(brain.genes, tad.deg), not.brain)))) ,' genes)')
)
) %>%
# group_by(cellType, hpa) %>%
# mutate(md_exp = quantile(upperQ_exp, 0.5),
# fivepct_exp = quantile(upperQ_exp, 0.95),
# fivepct_fxn = quantile(fxn_exp, .5)) %>%
filter(!is.na(hpa)) %>%
ggplot(aes( (upperQ_exp), fill = hpa))+
# geom_density(aes(color = hpa), alpha = .1)+
geom_histogram(position = 'dodge')+
# scale_y_log10()+
scale_fill_discrete('')+scale_color_discrete('')+
guides(fill = guide_legend(nrow=2))+
ggforce::facet_zoom(xlim = c(0,2), ylim = c(0,3.5e4) )+
theme(legend.position = 'top')
# also "brain genes":
# scores %>% filter(isScored_omics == 'Y', OmicsScore > 0) %>% pull(GeneName) %>% unique()
seaad.broad %>%
# filter( upperQ_exp >= 2 | (upperQ_exp >= 1.33 & fxn_exp >= 0.2) ) %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ 'brain.genes',
gene %in% tad.deg ~ 'tad.deg',
gene %in% not.brain ~ 'not.brain',
T ~ 'other')
) %>%
filter(!is.na(hpa)) %>%
# group_by(cellType, hpa) %>%
# mutate(top_exp = quantile(upperQ_exp, 0.75),
# top_fxn = quantile(fxn_exp, 0.95)) %>%
# group_by(cellType) %>%
# filter( upperQ_exp > unique(top_exp[hpa == 'not.brain']) ,
# fxn_exp > unique(top_fxn[hpa == 'not.brain']) ) %>%
# ungroup() %>%
filter( upperQ_exp >= -2.5 * fxn_exp + 2.3) %>%
# filter( upperQ_exp >= 2 | (upperQ_exp >= 1.33 & fxn_exp >= 0.2) ) %>%
group_by(hpa) %>%
mutate(
n = length(unique(gene)),
hpa1 = paste0(hpa, ', ', n)
) %>%
ggplot(aes( upperQ_exp, fill = hpa1))+
# geom_density(aes(color = hpa), alpha = .1)+
geom_histogram(position = 'dodge')+
# scale_y_log10()+
scale_fill_discrete('')+scale_color_discrete('')+
guides(fill = guide_legend(nrow=2))+
ggforce::facet_zoom(xlim = c(0,2), ylim = c(0,3.5e4) )+
theme(legend.position = 'top')
# also "brain genes":
# scores %>% filter(isScored_omics == 'Y', OmicsScore > 0) %>% pull(GeneName) %>% unique()
seaad.broad %>%
# filter( upperQ_exp >= 2 | (upperQ_exp >= 1.33 & fxn_exp >= 0.2) ) %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ 'brain.genes',
gene %in% tad.deg ~ 'tad.deg',
gene %in% not.brain ~ 'not.brain',
T ~ 'other')
) %>%
filter(!is.na(hpa)) %>%
# group_by(cellType, hpa) %>%
# mutate(top_exp = quantile(upperQ_exp, 0.75),
# top_fxn = quantile(fxn_exp, 0.95)) %>%
# group_by(cellType) %>%
# filter( upperQ_exp > unique(top_exp[hpa == 'not.brain']) ,
# fxn_exp > unique(top_fxn[hpa == 'not.brain']) ) %>%
# ungroup() %>%
filter( upperQ_exp >= -2.5 * fxn_exp + 2.3) %>%
# filter( upperQ_exp >= 2 | (upperQ_exp >= 1.33 & fxn_exp >= 0.2) ) %>%
group_by(hpa) %>%
mutate(
n = length(unique(gene)),
hpa1 = paste0(hpa, ', ', n)
) %>%
ggplot(aes( upperQ_exp, color = hpa1 ))+
geom_density( alpha = .1)+
# geom_histogram(position = 'dodge')+
# scale_y_log10()+
scale_fill_discrete('')+scale_color_discrete('')+
guides(color = guide_legend(nrow=2))+
ggforce::facet_zoom(xlim = c(0,2), ylim = c(0,1.3) )+
theme(legend.position = 'top')
# seaad.broad %>% group_by(cellType) %>% summarise(mn = median(fraction_expressed))
seaad.broad %>%
# filter( upperQ_exp >= 2 | (upperQ_exp >= 1.33 & fxn_exp >= 0.2) ) %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ 'brain.genes',
gene %in% tad.deg ~ 'tad.deg',
gene %in% not.brain ~ 'not.brain',
T ~ 'other')
) %>%
filter(!is.na(hpa)) %>%
# group_by(cellType, hpa) %>%
# mutate(top_exp = quantile(upperQ_exp, 0.75),
# top_fxn = quantile(fxn_exp, 0.95)) %>%
# group_by(cellType) %>%
# filter( upperQ_exp > unique(top_exp[hpa == 'not.brain']) ,
# fxn_exp > unique(top_fxn[hpa == 'not.brain']) ) %>%
# ungroup() %>%
filter( upperQ_exp >= -2.5 * fxn_exp + 2.3) %>%
# filter( upperQ_exp >= 2 | (upperQ_exp >= 1.33 & fxn_exp >= 0.2) ) %>%
group_by(hpa) %>%
mutate(
n = length(unique(gene)),
hpa1 = paste0(hpa, ', ', n)
) %>%
ggplot(., aes(upperQ_exp, cellType, fill = hpa1)) +
geom_violin(scale = 'width', draw_quantiles = c(.5), trim = F) +
# geom_point(aes(x = top_exp, color = hpa))+
# stat_summary(fun = function(x) quantile(x,0.5), geom="point", size=2, color="red", position = 'dodge')+
# ggforce::facet_zoom(xlim = c(0,0.08), horizontal = F)
coord_cartesian(xlim = c(0,6))
# McKenzie brain cell type specfic expression PMID 29892006
url='https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5995803/bin/41598_2018_27293_MOESM2_ESM.xlsx'
httr::GET(url, httr::write_disk(tf <- tempfile(fileext = ".xlsx")))
Response [https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5995803/bin/41598_2018_27293_MOESM2_ESM.xlsx]
Date: 2023-06-02 16:20
Status: 200
Content-Type: application/octet-stream
Size: 3.23 MB
<ON DISK> /tmp/RtmpiSkVJg/file3f71186c6bc9.xlsx
# readxl::excel_sheets(tf)
# [1] "top_all_enrich" "top_human_enrich" "top_mouse_enrich"
# [4] "top_all_expression" "top_human_expression" "top_mouse_expression"
# [7] "top_all_specificity" "top_human_specificity" "top_mouse_specificity"
ct_spec = readxl::read_xlsx(tf,sheet = 'top_all_enrich', skip = 1)
ct_spec %>%
group_by(Celltype) %>%
summarise(mn = median(grand_mean), sd = sd(grand_mean), mn_sd = mn+sd) %>%
arrange(desc(mn_sd))
dups = ct_spec$gene[which(duplicated(ct_spec$gene))] %>% unique()
ct_spec %>%
filter(!(gene %in% dups)) %>%
group_by(Celltype) %>%
mutate(mn = median(grand_mean), sd = sd(grand_mean), mn_sd = mn+sd) %>%
ggplot(aes(grand_mean, color = Celltype))+
geom_density()+
geom_vline(aes(xintercept = mn_sd, color = Celltype))
# seaad.broad = read_csv('/projects/carter-lab/caryg/seaAD/data/seaAD_broad_summaryCounts.csv')
# seaad.broad = read_csv('/projects/carter-lab/caryg/seaAD/data/seaAD_broad_upperQ_summaryCounts.csv')
ct_spec %>%
filter(!(gene %in% dups)) %>%
group_by(Celltype) %>%
mutate(mn = median(grand_mean), sd = sd(grand_mean), mn_sd = mn+sd) %>%
filter(grand_mean > mn_sd) %>%
select(gene, Celltype) %>%
distinct() %>%
inner_join(seaad.broad, ., by = 'gene') %>%
# filter( upperQ_exp >= -2.5 * fxn_exp + 2.5) %>%
ggplot(., aes(broad, upperQ_exp, fill = Celltype)) +
geom_violin(scale = 'width', draw_quantiles = c(.5), trim = F)
# seaad.broad = read_csv('/projects/carter-lab/caryg/seaAD/data/seaAD_broad_summaryCounts.csv')
# seaad.broad = read_csv('/projects/carter-lab/caryg/seaAD/data/seaAD_broad_upperQ_summaryCounts.csv')
ct_spec %>%
filter(!(gene %in% dups)) %>%
group_by(Celltype) %>%
mutate(mn = median(grand_mean), sd = sd(grand_mean), mn_sd = mn+sd) %>%
filter(grand_mean > mn_sd) %>%
select(gene, Celltype) %>%
distinct() %>%
inner_join(seaad.broad, ., by = 'gene') %>%
filter( upperQ_exp >= -2.5 * fxn_exp + 2.3) %>%
ggplot(., aes(broad, upperQ_exp, fill = Celltype)) +
geom_violin(scale = 'width', draw_quantiles = c(.5), trim = F)
seaad = read_csv('/projects/carter-lab/caryg/seaAD/data/seaAD_upperQ_summaryCounts.csv')
── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────
cols(
broad = col_character(),
cellType = col_character(),
group = col_character(),
gene = col_character(),
fxn_exp = col_double(),
upperQ_exp = col_double()
)
t = .9
seaad %>%
# filter(gene %in% not.brain) %>%
# mutate(hpa = 'not') %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ 'brain',
gene %in% not.brain ~ 'not',
gene %in% tad.deg ~ 'deg',
T ~ 'other'
)
) %>%
filter(hpa == 'not') %>%
group_by(cellType, hpa) %>%
summarise( exp = quantile(upperQ_exp, t),
fxn = quantile(fxn_exp, t)
) %>%
lm(exp~fxn, data = .) %>%
broom::tidy()
`summarise()` has grouped output by 'cellType'. You can override using the `.groups` argument.
x = map_dfr(
seq(1.5,3,.1),
~{
seaad %>%
filter( upperQ_exp >= -2.5 * fxn_exp + .x) %>%
summarise(
int = .x,
n.brain = length(intersect(gene, brain.genes )),
n.not = length(intersect(gene,not.brain)),
n.deg = length(intersect(gene,tad.deg)),
n.other = length(unique(setdiff(gene, union(union(brain.genes, tad.deg), not.brain)))),
diff.b = (n.brain - n.not),
pct.b = 100*(diff.b/n.brain),
diff.d = n.deg - n.not,
pct.d = 100*(diff.d/n.brain)
)
}
) %>%
arrange(desc(diff.b), desc(pct.d));
qplot(x$int, x$diff.b, geom = 'line')
DT::datatable(x)
t = .9
thresh = seaad %>%
filter(gene %in% not.brain) %>%
group_by(broad, cellType) %>%
summarise(
exp1 = quantile(upperQ_exp, t),
fxn1 = quantile(fxn_exp, t)
) %>%
mutate(
exp = exp1+.142,
fxn = fxn1+.142,
hpa = 'new.dat'
)
`summarise()` has grouped output by 'broad'. You can override using the `.groups` argument.
seaad %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ 'brain',
gene %in% not.brain ~ 'not',
gene %in% tad.deg ~ 'deg',
T ~ 'other')
) %>%
ggplot(aes(fxn_exp, upperQ_exp, group = hpa))+
geom_abline(intercept = 2.3, slope = -2.5)+
# annotate(geom = 'text', x = .65, y = 0, label = 'y = -2.5x + 2.5', hjust = 0)+
geom_point(data = thresh, aes(x = fxn, y = exp), size = 2, alpha = .5)+
geom_smooth(method = 'lm', aes(color = hpa), lwd = .5, lty = 2)+
labs(y = 'expression', x = 'fraction expressing'
#, subtitle = paste0('plotting quantile: ', t)
)+
facet_wrap(~broad, scales = 'free')
t = .9
thresh = seaad %>%
filter(gene %in% not.brain) %>%
group_by(broad, cellType) %>%
summarise(
exp1 = quantile(upperQ_exp, t),
fxn1 = quantile(fxn_exp, t)
) %>%
mutate(
exp = exp1+.142,
fxn = fxn1+.142,
hpa = 'new.dat'
)
`summarise()` has grouped output by 'broad'. You can override using the `.groups` argument.
seaad %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ 'brain',
gene %in% not.brain ~ 'not',
gene %in% tad.deg ~ 'deg',
T ~ 'other')
) %>%
# group_by(cellType) %>%
# filter(
# fxn_exp >= thresh$fxn[ which(thresh$cellType == cellType) ] %>% unique(),
# upperQ_exp >= thresh$exp[ which(thresh$cellType == cellType) ] %>% unique(),
# .preserve = T
# ) %>%
# ungroup() %>%
filter( upperQ_exp >= -2.5 * fxn_exp + 2.3) %>%
ggplot(aes(fxn_exp, upperQ_exp, group = hpa))+
geom_abline(intercept = 2.3, slope = -2.5)+
# annotate(geom = 'text', x = .65, y = 0, label = 'y = -2.5x + 2.5', hjust = 0)+
geom_point(data = thresh, aes(x = fxn, y = exp), size = 2, alpha = .5)+
geom_smooth(method = 'lm', aes(color = hpa), lwd = .5, lty = 2)+
labs(y = 'expression', x = 'fraction expressing'
#, subtitle = paste0('plotting quantile: ', t)
)+
facet_wrap(~broad, scales = 'free')
seaad %>%
ggplot(aes(upperQ_exp, fxn_exp))+
geom_bin2d(aes(fill = ..count.. ))+
viridis::scale_fill_viridis(trans = 'log10', option = 'A')+
ggtitle('no filter')+
facet_wrap(~broad)
thresh = seaad %>%
filter(gene %in% not.brain) %>%
group_by(broad, cellType) %>%
summarise(
exp1 = quantile(upperQ_exp, t),
fxn1 = quantile(fxn_exp, t)
) %>%
mutate(
exp = exp1+.142,
fxn = fxn1+.142,
hpa = 'new.dat'
)
`summarise()` has grouped output by 'broad'. You can override using the `.groups` argument.
seaad %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ 'brain',
gene %in% tad.deg ~ 'deg',
gene %in% not.brain ~ 'not',
T ~ 'other')
) %>%
filter(!is.na(hpa)) %>%
# group_by(cellType) %>%
# filter(
# fxn_exp >= thresh$fxn[ which(thresh$cellType == cellType) ] %>% unique(),
# upperQ_exp >= thresh$exp[ which(thresh$cellType == cellType) ] %>% unique(),
# .preserve = T
# ) %>%
# ungroup() %>%
filter( upperQ_exp >= -2.5 * fxn_exp + 2.3) %>%
# group_by(cellType, hpa) %>%
# mutate( exp = quantile(upperQ_exp, .9),
# fxn = quantile(fxn_exp, .9)
# ) %>%
# group_by(cellType) %>%
# filter( ) %>%
# ungroup() %>%
ggplot(aes(upperQ_exp, fxn_exp))+
geom_bin2d(aes(fill = ..count.. ))+
viridis::scale_fill_viridis(trans = 'log10', option = 'A')+
facet_wrap(~broad)
seaad %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ 'brain.genes',
gene %in% tad.deg ~ 'tad.deg',
gene %in% not.brain ~ 'not.brain',
T ~ 'other')
) %>%
filter(!is.na(hpa)) %>%
group_by(hpa) %>%
mutate(
n = length(unique(gene)),
hpa1 = paste0(hpa, ', ', n)
) %>%
ggplot(aes(upperQ_exp, fxn_exp))+
geom_bin2d(aes(fill = ..count.. ))+
viridis::scale_fill_viridis(trans = 'log10', option = 'A')+
ggtitle('no filter')+
facet_grid(broad~hpa1)
NA
thresh = seaad %>%
filter(gene %in% not.brain) %>%
group_by(broad, cellType) %>%
summarise(
exp1 = quantile(upperQ_exp, t),
fxn1 = quantile(fxn_exp, t)
) %>%
mutate(
exp = exp1+.142,
fxn = fxn1+.142,
hpa = 'new.dat'
)
`summarise()` has grouped output by 'broad'. You can override using the `.groups` argument.
seaad %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ 'brain',
gene %in% tad.deg ~ 'tad.deg',
gene %in% not.brain ~ 'not.brain',
T ~ 'other')
) %>%
filter(!is.na(hpa)) %>%
# group_by(cellType) %>%
# filter(
# fxn_exp >= thresh$fxn[ which(thresh$cellType == cellType) ] %>% unique(),
# upperQ_exp >= thresh$exp[ which(thresh$cellType == cellType) ] %>% unique(),
# .preserve = T
# ) %>%
# ungroup() %>%
filter( upperQ_exp >= -2.5 * fxn_exp + 2.3) %>%
# filter( upperQ_exp >= 2 | (upperQ_exp >= 1.33 & fxn_exp >= 0.2) ) %>%
# group_by(cellType, hpa) %>%
# mutate(top_exp = quantile(upperQ_exp, 0.75),
# top_fxn = quantile(fxn_exp, 0.95)) %>%
# group_by(cellType) %>%
# filter( upperQ_exp > unique(top_exp[hpa == 'not.brain']) ,
# fxn_exp > unique(top_fxn[hpa == 'not.brain']) ) %>%
# ungroup() %>%
group_by(hpa) %>%
mutate(
n = length(unique(gene)),
hpa1 = paste0(hpa, ', ', n)
) %>%
ggplot(aes(upperQ_exp, fxn_exp))+
geom_bin2d(aes(fill = ..count.. ))+
viridis::scale_fill_viridis(trans = 'log10', option = 'A')+
ggtitle('filtered')+
facet_grid(broad~hpa1)
NA
# also "brain genes":
# scores %>% filter(isScored_omics == 'Y', OmicsScore > 0) %>% pull(GeneName) %>% unique()
seaad %>%
# filter(is.na(group)) %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ paste0('HPA brain (n = ', length(intersect(gene, brain.genes)),' genes)'),
gene %in% tad.deg ~ paste0('TAD DEG (n = ', length(intersect(gene,tad.deg)),' genes)'),
gene %in% not.brain ~ paste0('HPA not brain (n = ', length(intersect(gene,not.brain)),' genes)'),
T ~ paste0('other (n = ', length(unique(setdiff(gene, union(union(brain.genes, tad.deg), not.brain)))) ,' genes)')
)
) %>%
filter(!is.na(hpa)) %>%
ggplot(aes( fxn_exp, fill = hpa))+
geom_density(aes(color = hpa), alpha = .1)+
# geom_histogram(position = 'dodge')+
# scale_y_log10()+
scale_fill_discrete('')+scale_color_discrete('')+
guides(fill = guide_legend(nrow=2))+
ggforce::facet_zoom(xlim = c(0,.25), ylim = c(0,5))+
theme(legend.position = 'top')
# seaad %>% group_by(cellType) %>% summarise(mn = median(fraction_expressed))
seaad %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ paste0('HPA brain (n = ', length(intersect(gene, brain.genes)),' genes)'),
gene %in% tad.deg ~ paste0('TAD DEG (n = ', length(intersect(gene,tad.deg)),' genes)'),
gene %in% not.brain ~ paste0('HPA not brain (n = ', length(intersect(gene,not.brain)),' genes)'),
T ~ paste0('other (n = ', length(unique(setdiff(gene, union(union(brain.genes, tad.deg), not.brain)))) ,' genes)')
)
) %>%
# group_by(cellType, hpa) %>%
# mutate(top_exp = quantile(upperQ_exp, 0.75),
# top_fxn = quantile(fxn_exp, 0.75)) %>%
ggplot(., aes(fxn_exp, broad, fill = hpa)) +
geom_violin(scale = 'width', draw_quantiles = c(.5)) +
# geom_point(aes(x = top_fxn, color = hpa))+
# stat_summary(fun = function(x) quantile(x,0.5), geom="point", size=2, color="red", position = 'dodge')+
# ggforce::facet_zoom(xlim = c(0,0.08), horizontal = F)
coord_cartesian(xlim = c(0,0.25))
# also "brain genes":
# scores %>% filter(isScored_omics == 'Y', OmicsScore > 0) %>% pull(GeneName) %>% unique()
seaad %>%
# filter(is.na(group)) %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ paste0('HPA brain (n = ', length(intersect(gene, brain.genes)),' genes)'),
gene %in% tad.deg ~ paste0('TAD DEG (n = ', length(intersect(gene,tad.deg)),' genes)'),
gene %in% not.brain ~ paste0('HPA not brain (n = ', length(intersect(gene,not.brain)),' genes)'),
T ~ paste0('other (n = ', length(unique(setdiff(gene, union(union(brain.genes, tad.deg), not.brain)))) ,' genes)')
)
) %>%
filter(!is.na(hpa)) %>%
ggplot(aes( fxn_exp, fill = hpa))+
# geom_density(aes(color = hpa), alpha = .1)+
geom_histogram(position = 'dodge')+
# scale_y_log10()+
scale_fill_discrete('')+scale_color_discrete('')+
guides(fill = guide_legend(nrow=2))+
ggforce::facet_zoom(xlim = c(0,.25), ylim = c(0,1e5))+
theme(legend.position = 'top')
# also "brain genes":
# scores %>% filter(isScored_omics == 'Y', OmicsScore > 0) %>% pull(GeneName) %>% unique()
seaad %>%
# filter( upperQ_exp >= 2 | (upperQ_exp >= 1.33 & fxn_exp >= 0.2) ) %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ 'brain.genes',
gene %in% tad.deg ~ 'tad.deg',
gene %in% not.brain ~ 'not.brain',
T ~ 'other')
) %>%
filter(!is.na(hpa)) %>%
# group_by(cellType, hpa) %>%
# mutate(top_exp = quantile(upperQ_exp, 0.75),
# top_fxn = quantile(fxn_exp, 0.95)) %>%
# group_by(cellType) %>%
# filter( upperQ_exp > unique(top_exp[hpa == 'not.brain']) ,
# fxn_exp > unique(top_fxn[hpa == 'not.brain']) ) %>%
# ungroup() %>%
filter( upperQ_exp >= -2.5 * fxn_exp + 2.3) %>%
# filter( upperQ_exp >= 2 | (upperQ_exp >= 1.33 & fxn_exp >= 0.2) ) %>%
group_by(hpa) %>%
mutate(
n = length(unique(gene)),
hpa1 = paste0(hpa, ', ', n)
) %>%
ggplot(aes( fxn_exp, fill = hpa1))+
# geom_density(aes(color = hpa1), alpha = .1)+
geom_histogram(position = 'dodge')+
# scale_y_log10()+
scale_fill_discrete('')+scale_color_discrete('')+
guides(fill = guide_legend(nrow=2))+
ggforce::facet_zoom(xlim = c(0,.25), ylim = c(0,1e5))+
theme(legend.position = 'top')
# also "brain genes":
# scores %>% filter(isScored_omics == 'Y', OmicsScore > 0) %>% pull(GeneName) %>% unique()
seaad %>%
# filter( upperQ_exp >= 2 | (upperQ_exp >= 1.33 & fxn_exp >= 0.2) ) %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ 'brain.genes',
gene %in% tad.deg ~ 'tad.deg',
gene %in% not.brain ~ 'not.brain',
T ~ 'other')
) %>%
filter(!is.na(hpa)) %>%
# group_by(cellType, hpa) %>%
# mutate(top_exp = quantile(upperQ_exp, 0.75),
# top_fxn = quantile(fxn_exp, 0.95)) %>%
# group_by(cellType) %>%
# filter( upperQ_exp > unique(top_exp[hpa == 'not.brain']) ,
# fxn_exp > unique(top_fxn[hpa == 'not.brain']) ) %>%
# ungroup() %>%
filter( upperQ_exp >= -2.5 * fxn_exp + 2.3) %>%
# filter( upperQ_exp >= 2 | (upperQ_exp >= 1.33 & fxn_exp >= 0.2) ) %>%
group_by(hpa) %>%
mutate(
n = length(unique(gene)),
hpa1 = paste0(hpa, ', ', n)
) %>%
ggplot(aes( fxn_exp, color = hpa1))+
geom_density( alpha = .1)+
# geom_histogram(position = 'dodge')+
# scale_y_log10()+
geom_vline(xintercept = 0.2, lty = 2)+
scale_fill_discrete('')+scale_color_discrete('')+
guides(fill = guide_legend(nrow=2))+
ggforce::facet_zoom(xlim = c(0,.25), ylim = c(0,1.7))+#
theme(legend.position = 'top')
# seaad %>% group_by(cellType) %>% summarise(mn = median(fraction_expressed))
seaad %>%
# filter( upperQ_exp >= 2 | (upperQ_exp >= 1.33 & fxn_exp >= 0.2) ) %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ 'brain.genes',
gene %in% tad.deg ~ 'tad.deg',
gene %in% not.brain ~ 'not.brain',
T ~ 'other')
) %>%
filter(!is.na(hpa)) %>%
# group_by(cellType, hpa) %>%
# mutate(top_exp = quantile(upperQ_exp, 0.75),
# top_fxn = quantile(fxn_exp, 0.95)) %>%
# group_by(cellType) %>%
# filter( upperQ_exp > unique(top_exp[hpa == 'not.brain']) ,
# fxn_exp > unique(top_fxn[hpa == 'not.brain']) ) %>%
# ungroup() %>%
filter( upperQ_exp >= -2.5 * fxn_exp + 2.3) %>%
# filter( upperQ_exp >= 2 | (upperQ_exp >= 1.33 & fxn_exp >= 0.2) ) %>%
group_by(hpa) %>%
mutate(
n = length(unique(gene)),
hpa1 = paste0(hpa, ', ', n)
) %>%
ggplot(., aes(fxn_exp, broad, fill = hpa1)) +
geom_violin(scale = 'width', draw_quantiles = c(.5), trim = F) +
# geom_point(aes(x = top_fxn, color = hpa))+
# stat_summary(fun = function(x) quantile(x,0.5), geom="point", size=2, color="red", position = 'dodge')+
# ggforce::facet_zoom(xlim = c(0,0.08), horizontal = F)
coord_cartesian(xlim = c(0,1))
# also "brain genes":
# scores %>% filter(isScored_omics == 'Y', OmicsScore > 0) %>% pull(GeneName) %>% unique()
seaad %>%
# filter(is.na(group)) %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ 'brain.genes',
gene %in% tad.deg ~ 'tad.deg',
gene %in% not.brain ~ 'not.brain',
T ~ 'other')
) %>%
filter(!is.na(hpa)) %>%
group_by(hpa) %>%
mutate(
n = length(unique(gene)),
hpa1 = paste0(hpa, ', ', n)
) %>%
ggplot(aes( (upperQ_exp), fill = hpa1))+
geom_density(aes(color = hpa), alpha = .1)+
# geom_histogram(position = 'dodge')+
# scale_y_log10()+
scale_fill_discrete('')+scale_color_discrete('')+
guides(fill = guide_legend(nrow=2))+
ggforce::facet_zoom(xlim = c(0,2), ylim = c(0,1.3) )+
theme(legend.position = 'top')
# seaad %>% group_by(cellType) %>% summarise(mn = median(fraction_expressed))
seaad %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ 'brain.genes',
gene %in% tad.deg ~ 'tad.deg',
gene %in% not.brain ~ 'not.brain',
T ~ 'other')
) %>%
filter(!is.na(hpa)) %>%
group_by(hpa) %>%
mutate(
n = length(unique(gene)),
hpa1 = paste0(hpa, ', ', n)
) %>%
# group_by(cellType, hpa) %>%
# mutate(top_exp = quantile(upperQ_exp, 0.75),
# top_fxn = quantile(fxn_exp, 0.75)) %>%
ggplot(., aes(upperQ_exp, broad, fill = hpa1)) +
geom_violin(scale = 'width', draw_quantiles = c(.5)) +
# geom_point(aes(x = top_exp, color = hpa))+
# stat_summary(fun = function(x) quantile(x,0.5), geom="point", size=2, color="red", position = 'dodge')+
# ggforce::facet_zoom(xlim = c(0,0.08), horizontal = F)
coord_cartesian(xlim = c(0,6))
# also "brain genes":
# scores %>% filter(isScored_omics == 'Y', OmicsScore > 0) %>% pull(GeneName) %>% unique()
seaad %>%
# filter(is.na(group)) %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ 'brain.genes',
gene %in% tad.deg ~ 'tad.deg',
gene %in% not.brain ~ 'not.brain',
T ~ 'other')
) %>%
filter(!is.na(hpa)) %>%
group_by(hpa) %>%
mutate(
n = length(unique(gene)),
hpa1 = paste0(hpa, ', ', n)
) %>%
ggplot(aes( (upperQ_exp), fill = hpa1))+
# geom_density(aes(color = hpa), alpha = .1)+
geom_histogram(position = 'dodge')+
# scale_y_log10()+
scale_fill_discrete('')+scale_color_discrete('')+
guides(fill = guide_legend(nrow=2))+
ggforce::facet_zoom(xlim = c(0,2), ylim = c(0,3.5e4) )+
theme(legend.position = 'top')
# also "brain genes":
# scores %>% filter(isScored_omics == 'Y', OmicsScore > 0) %>% pull(GeneName) %>% unique()
seaad %>%
# filter( upperQ_exp >= 2 | (upperQ_exp >= 1.33 & fxn_exp >= 0.2) ) %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ 'brain.genes',
gene %in% tad.deg ~ 'tad.deg',
gene %in% not.brain ~ 'not.brain',
T ~ 'other')
) %>%
filter(!is.na(hpa)) %>%
# group_by(cellType, hpa) %>%
# mutate(top_exp = quantile(upperQ_exp, 0.75),
# top_fxn = quantile(fxn_exp, 0.95)) %>%
# group_by(cellType) %>%
# filter( upperQ_exp > unique(top_exp[hpa == 'not.brain']) ,
# fxn_exp > unique(top_fxn[hpa == 'not.brain']) ) %>%
# ungroup() %>%
filter( upperQ_exp >= -2.5 * fxn_exp + 2.3) %>%
# filter( upperQ_exp >= 2 | (upperQ_exp >= 1.33 & fxn_exp >= 0.2) ) %>%
group_by(hpa) %>%
mutate(
n = length(unique(gene)),
hpa1 = paste0(hpa, ', ', n)
) %>%
ggplot(aes( upperQ_exp, fill = hpa1))+
# geom_density(aes(color = hpa), alpha = .1)+
geom_histogram(position = 'dodge')+
# scale_y_log10()+
scale_fill_discrete('')+scale_color_discrete('')+
guides(fill = guide_legend(nrow=2))+
ggforce::facet_zoom(xlim = c(0,2), ylim = c(0,3.5e4) )+
theme(legend.position = 'top')
# also "brain genes":
# scores %>% filter(isScored_omics == 'Y', OmicsScore > 0) %>% pull(GeneName) %>% unique()
seaad %>%
# filter( upperQ_exp >= 2 | (upperQ_exp >= 1.33 & fxn_exp >= 0.2) ) %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ 'brain.genes',
gene %in% tad.deg ~ 'tad.deg',
gene %in% not.brain ~ 'not.brain',
T ~ 'other')
) %>%
filter(!is.na(hpa)) %>%
# group_by(cellType, hpa) %>%
# mutate(top_exp = quantile(upperQ_exp, 0.75),
# top_fxn = quantile(fxn_exp, 0.95)) %>%
# group_by(cellType) %>%
# filter( upperQ_exp > unique(top_exp[hpa == 'not.brain']) ,
# fxn_exp > unique(top_fxn[hpa == 'not.brain']) ) %>%
# ungroup() %>%
filter( upperQ_exp >= -2.5 * fxn_exp + 2.3) %>%
# filter( upperQ_exp >= 2 | (upperQ_exp >= 1.33 & fxn_exp >= 0.2) ) %>%
group_by(hpa) %>%
mutate(
n = length(unique(gene)),
hpa1 = paste0(hpa, ', ', n)
) %>%
ggplot(aes( upperQ_exp, color = hpa1 ))+
geom_density( alpha = .1)+
# geom_histogram(position = 'dodge')+
# scale_y_log10()+
scale_fill_discrete('')+scale_color_discrete('')+
guides(color = guide_legend(nrow=2))+
ggforce::facet_zoom(xlim = c(0,2), ylim = c(0,1.3) )+
theme(legend.position = 'top')
# seaad %>% group_by(cellType) %>% summarise(mn = median(fraction_expressed))
seaad %>%
# filter( upperQ_exp >= 2 | (upperQ_exp >= 1.33 & fxn_exp >= 0.2) ) %>%
mutate(
hpa = case_when(
gene %in% brain.genes ~ 'brain.genes',
gene %in% tad.deg ~ 'tad.deg',
gene %in% not.brain ~ 'not.brain',
T ~ 'other')
) %>%
filter(!is.na(hpa)) %>%
# group_by(cellType, hpa) %>%
# mutate(top_exp = quantile(upperQ_exp, 0.75),
# top_fxn = quantile(fxn_exp, 0.95)) %>%
# group_by(cellType) %>%
# filter( upperQ_exp > unique(top_exp[hpa == 'not.brain']) ,
# fxn_exp > unique(top_fxn[hpa == 'not.brain']) ) %>%
# ungroup() %>%
filter( upperQ_exp >= -2.5 * fxn_exp + 2.3) %>%
# filter( upperQ_exp >= 2 | (upperQ_exp >= 1.33 & fxn_exp >= 0.2) ) %>%
group_by(hpa) %>%
mutate(
n = length(unique(gene)),
hpa1 = paste0(hpa, ', ', n)
) %>%
ggplot(., aes(upperQ_exp, broad, fill = hpa1)) +
geom_violin(scale = 'width', draw_quantiles = c(.5), trim = F) +
# geom_point(aes(x = top_exp, color = hpa))+
# stat_summary(fun = function(x) quantile(x,0.5), geom="point", size=2, color="red", position = 'dodge')+
# ggforce::facet_zoom(xlim = c(0,0.08), horizontal = F)
coord_cartesian(xlim = c(0,6))
seaad.genes <- seaad %>%
filter( upperQ_exp >= -2.5 * fxn_exp + 2.3 ) %>%
pull(gene) %>%
unique()
tot.nodes = v.attr$name
filt.nodes = union(seaad.genes, tad.deg) %>% union(., brain.genes) %>% intersect(.,v.attr$name)
grid::grid.newpage()
grid::grid.draw(
VennDiagram::venn.diagram(
x = list(
`PathCommons_NW\n(19087)` = v.attr$name %>% .[!is.na(.)],
`HumanProtAtlas_brain\n(4140)` = brain.genes %>% .[!is.na(.)],
`SeaAD\n(10594)` = seaad.genes %>% .[!is.na(.)],
`TreatAD_DEG\n(11910)` = tad.deg %>% .[!is.na(.)]
),
filename = NULL,
force.unique = T,
lty = 0, alpha = .3 ,
fill = c('light blue','yellow', 'green','purple'),
sub = paste0('# Pathway Commons nodes: ', length(tot.nodes),
'\n# filtered nodes: ', length(filt.nodes),
' (', signif( 100*length(filt.nodes)/length(tot.nodes), digits = 4), '%)' ),
cat.cex = .8,
ext.pos = 180,
ext.dist = -.1
)
)
Specify the directed-edge types to filter the Pathway Commons graph
directed_edge_types = c("catalysis-precedes",
"controls-expression-of",
"controls-phosphorylation-of",
"controls-state-change-of",
"controls-transport-of"
)
# dir.net <- graph_from_data_frame(d = net.tbl %>% filter(interaction %in% directed_edge_types), directed = T)
How many edges from each source?
bind_cols(
sources = str_split(net.tbl$sources,',') %>% unlist %>% unique,
n_edge = map_dbl(
str_split(net.tbl$sources,',') %>% unlist %>% unique,
~ net.tbl %>% filter(grepl(.x, sources)) %>% nrow() ),
any_directed = map_lgl(
str_split(net.tbl$sources,',') %>% unlist %>% unique,
~ net.tbl %>%
mutate(directed = if_else(interaction %in% directed_edge_types, 'dir','undir') ) %>%
filter(grepl(.x, sources)) %>% pull(directed) %>% any(. == 'dir')
)
) %>%
mutate(any_directed = if_else(is.na(any_directed), FALSE, TRUE),
any_directed = factor(any_directed, levels=c('TRUE','FALSE'))) %>%
arrange(desc(n_edge)) %>% mutate(sources = fct_relevel(sources, sources)) %>%
ggplot(aes(sources, n_edge)) + geom_bar(stat = 'identity', aes(fill = any_directed)) +
# scale_y_log10()+
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)
, legend.position = 'top')
Which edge sources are (generally) well supported?
bind_cols(
sources = str_split(net.tbl$sources,',') %>% unlist %>% unique,
n_edge = map_dbl(
str_split(net.tbl$sources,',') %>% unlist %>% unique,
~ net.tbl %>% filter(grepl(.x, sources)) %>% nrow() )
, median_evidence_per_edge = map_dbl(
str_split(net.tbl$sources,',') %>% unlist %>% unique,
~ net.tbl %>% filter(grepl(.x, sources)) %>% pull(n_edge_evidence) %>% median() )
, any_directed = map_lgl(
str_split(net.tbl$sources,',') %>% unlist %>% unique,
~ net.tbl %>%
mutate(directed = if_else(interaction %in% directed_edge_types, 'dir','undir') ) %>%
filter(grepl(.x, sources)) %>% pull(directed) %>% any(. == 'dir') )
) %>%
mutate(any_directed = if_else(is.na(any_directed), FALSE, TRUE),
any_directed = factor(any_directed, levels=c('TRUE','FALSE'))) %>%
arrange(desc(n_edge)) %>% mutate(sources = fct_relevel(sources, sources)) %>%
ggplot(aes(n_edge, median_evidence_per_edge)) +
geom_point( aes(color = any_directed )) +
scale_x_log10()+
theme(legend.position = 'top')+
ggrepel::geom_label_repel(aes(label = sources), size = 3)
# theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))
Distribution of evidence support for edges
net.tbl %>%
mutate(directed = if_else(interaction %in% directed_edge_types, 'dir','undir') ) %>%
ggplot(aes(n_edge_evidence))+
stat_ecdf(geom = 'line', aes(color = directed))+
# geom_density(aes(fill = directed))+
# geom_histogram(aes(fill = directed), position = position_dodge())+
# scale_y_log10()+
scale_x_log10()+
geom_vline(xintercept = 2, lty = 2, lwd = .5)+
labs(y = 'fraction of edges')
Distribution of evidence support for edges
net.tbl %>%
mutate(directed = if_else(interaction %in% directed_edge_types, 'dir','undir') ) %>%
ggplot(aes(n_edge_types))+
stat_ecdf(geom = 'line', aes(color = directed))+
# geom_density(aes(fill = directed))+
# geom_histogram(aes(fill = directed), position = position_dodge())+
# scale_y_log10()+
# scale_x_log10()+
geom_vline(xintercept = 2, lty = 2, lwd = .5)+
labs(y = 'fraction of edges')
Distribution of evidence support for edges
net.tbl %>%
mutate(directed = if_else(interaction %in% directed_edge_types, 'dir','undir') ) %>%
ggplot(aes(n_source))+
stat_ecdf(geom = 'line', aes(color = directed))+
# geom_density(aes(fill = directed))+
# geom_histogram(aes(fill = directed), position = position_dodge())+
# scale_y_log10()+
scale_x_log10()+
geom_vline(xintercept = 2, lty = 2, lwd = .5)+
labs(y = 'fraction of edges')
Distribution of evidence support for edges
net.tbl %>%
mutate(directed = if_else(interaction %in% directed_edge_types, 'dir','undir') ) %>%
filter(n_edge_evidence > 1
| (n_edge_evidence == 1 & n_edge_types > 2 & n_source > 2)) %>%
ggplot(aes(n_edge))+
# stat_ecdf(geom = 'line', aes(color = directed))+
geom_histogram(color = 'grey20', fill = 'grey80')+
# scale_x_log10()+
# geom_vline(xintercept = 2, lty = 2, lwd = .5)+
labs()
Distribution of evidence support for edges
net.tbl %>%
# mutate(directed = if_else(interaction %in% directed_edge_types, 'dir','undir') ) %>%
ggplot(aes( n_edge_types, n_edge_evidence ))+
geom_point()+
geom_smooth(method = 'lm')+
scale_y_log10()+
# scale_x_log10()+
# geom_vline(xintercept = 2, lty = 2, lwd = .5)+
labs()
Dropping this for the moment because: 1. the current pcor calculations would need to be recalculated on a tissue specific basis 2. the data are large, unwieldy, and of uncertain utility
May return at a later time.
# specify gene lists
hpa.brain <- tissue.array %>%
filter(
Tissue %in% brain.tissues,
Level %in% c('Low','Medium','High','Ascending','Descending'),
Reliability %in% c('Enhanced','Supported','Approved') ) %>%
pull(Gene.name) %>% unique()
tad.deg <- scores %>%
filter(isScored_omics == 'Y', OmicsScore > 0) %>%
pull(GeneName) %>% unique()
seaad.expr <- seaad %>%
filter( upperQ_exp >= -2.5 * fxn_exp + 2.3 ) %>%
pull(gene) %>% unique()
brain.genes <- union(hpa.brain, tad.deg) %>% union(., seaad.expr)
# pull NW stats based on filter
nw.stats <- tibble(
dir = c(
'undirected'
,'directed'
,'undirected'
,'directed'
,'undirected'
,'directed'
,'undirected'
,'directed'
,'undirected'
,'directed'
,'undirected'
,'directed'
,'undirected'
,'directed'
),
filt = c(
'none'
,'none'
,'edge_evidence > 1'
,'edge_evidence > 1'
,'HPA_brain'
,'HPA_brain'
,'omics_detect'
,'omics_detect'
,'seaAD_detect'
,'seaAD_detect'
,'all_expr'
,'all_expr'
,'expr+evidence'
,'expr+evidence'
),
nV = c(
net.tbl %>%
graph_from_data_frame %>% V %>% length
, net.tbl %>%
filter(interaction %in% directed_edge_types) %>%
graph_from_data_frame(directed=T) %>% V %>% length
, net.tbl %>%
filter(n_edge_evidence > 1) %>%
graph_from_data_frame %>% V %>% length
, net.tbl %>%
filter(n_edge_evidence > 1,
interaction %in% directed_edge_types) %>%
graph_from_data_frame(directed=T) %>% V %>% length
, net.tbl %>%
filter(from %in% hpa.brain & to %in% hpa.brain) %>%
graph_from_data_frame %>% V %>% length
, net.tbl %>%
filter(from %in% hpa.brain & to %in% hpa.brain,
interaction %in% directed_edge_types) %>%
graph_from_data_frame(directed=T) %>% V %>% length
, net.tbl %>%
filter(from %in% tad.deg & to %in% tad.deg) %>%
graph_from_data_frame %>% V %>% length
, net.tbl %>%
filter(from %in% tad.deg & to %in% tad.deg,
interaction %in% directed_edge_types) %>%
graph_from_data_frame(directed=T) %>% V %>% length
, net.tbl %>%
filter(from %in% seaad.expr & to %in% seaad.expr) %>%
graph_from_data_frame %>% V %>% length
, net.tbl %>%
filter(from %in% seaad.expr & to %in% seaad.expr,
interaction %in% directed_edge_types) %>%
graph_from_data_frame(directed=T) %>% V %>% length
, net.tbl %>%
filter(from %in% brain.genes & to %in% brain.genes) %>%
graph_from_data_frame %>% V %>% length
, net.tbl %>%
filter(from %in% brain.genes & to %in% brain.genes,
interaction %in% directed_edge_types) %>%
graph_from_data_frame(directed=T) %>% V %>% length
, net.tbl %>%
filter(n_edge_evidence > 1,
from %in% brain.genes & to %in% brain.genes) %>%
graph_from_data_frame %>% V %>% length
, net.tbl %>%
filter(n_edge_evidence > 1,
from %in% brain.genes & to %in% brain.genes,
interaction %in% directed_edge_types) %>%
graph_from_data_frame(directed=T) %>% V %>% length
),
nE = c(
net.tbl %>%
graph_from_data_frame %>% E %>% length
, net.tbl %>%
filter(interaction %in% directed_edge_types) %>%
graph_from_data_frame(directed=T) %>% E %>% length
, net.tbl %>%
filter(n_edge_evidence > 1) %>%
graph_from_data_frame %>% E %>% length
, net.tbl %>%
filter(n_edge_evidence > 1,
interaction %in% directed_edge_types) %>%
graph_from_data_frame(directed=T) %>% E %>% length
, net.tbl %>%
filter(from %in% hpa.brain & to %in% hpa.brain) %>%
graph_from_data_frame %>% E %>% length
, net.tbl %>%
filter(from %in% hpa.brain & to %in% hpa.brain,
interaction %in% directed_edge_types) %>%
graph_from_data_frame(directed=T) %>% E %>% length
, net.tbl %>%
filter(from %in% tad.deg & to %in% tad.deg) %>%
graph_from_data_frame %>% E %>% length
, net.tbl %>%
filter(from %in% tad.deg & to %in% tad.deg,
interaction %in% directed_edge_types) %>%
graph_from_data_frame(directed=T) %>% E %>% length
, net.tbl %>%
filter(from %in% seaad.expr & to %in% seaad.expr) %>%
graph_from_data_frame %>% E %>% length
, net.tbl %>%
filter(from %in% seaad.expr & to %in% seaad.expr,
interaction %in% directed_edge_types) %>%
graph_from_data_frame(directed=T) %>% E %>% length
, net.tbl %>%
filter(from %in% brain.genes & to %in% brain.genes) %>%
graph_from_data_frame %>% E %>% length
, net.tbl %>%
filter(from %in% brain.genes & to %in% brain.genes,
interaction %in% directed_edge_types) %>%
graph_from_data_frame(directed=T) %>% E %>% length
, net.tbl %>%
filter(n_edge_evidence > 1,
from %in% brain.genes & to %in% brain.genes) %>%
graph_from_data_frame %>% E %>% length
, net.tbl %>%
filter(n_edge_evidence > 1,
from %in% brain.genes & to %in% brain.genes,
interaction %in% directed_edge_types) %>%
graph_from_data_frame(directed=T) %>% E %>% length
),
avg_path_length = c(
net.tbl %>%
graph_from_data_frame %>% average.path.length()
, net.tbl %>%
filter(interaction %in% directed_edge_types) %>%
graph_from_data_frame(directed=T) %>% average.path.length(directed = T)
, net.tbl %>%
filter(n_edge_evidence > 1) %>%
graph_from_data_frame %>% average.path.length()
, net.tbl %>%
filter(n_edge_evidence > 1,
interaction %in% directed_edge_types) %>%
graph_from_data_frame(directed=T) %>% average.path.length(directed = T)
, net.tbl %>%
filter(from %in% hpa.brain & to %in% hpa.brain) %>%
graph_from_data_frame %>% average.path.length()
, net.tbl %>%
filter(from %in% hpa.brain & to %in% hpa.brain,
interaction %in% directed_edge_types) %>%
graph_from_data_frame(directed=T) %>% average.path.length(directed = T)
, net.tbl %>%
filter(from %in% tad.deg & to %in% tad.deg) %>%
graph_from_data_frame %>% average.path.length()
, net.tbl %>%
filter(from %in% tad.deg & to %in% tad.deg,
interaction %in% directed_edge_types) %>%
graph_from_data_frame(directed=T) %>% average.path.length(directed = T)
, net.tbl %>%
filter(from %in% seaad.expr & to %in% seaad.expr) %>%
graph_from_data_frame %>% average.path.length()
, net.tbl %>%
filter(from %in% seaad.expr & to %in% seaad.expr,
interaction %in% directed_edge_types) %>%
graph_from_data_frame(directed=T) %>% average.path.length(directed = T)
, net.tbl %>%
filter(from %in% brain.genes & to %in% brain.genes) %>%
graph_from_data_frame %>% average.path.length()
, net.tbl %>%
filter(from %in% brain.genes & to %in% brain.genes,
interaction %in% directed_edge_types) %>%
graph_from_data_frame(directed=T) %>% average.path.length(directed = T)
, net.tbl %>%
filter(n_edge_evidence > 1,
from %in% brain.genes & to %in% brain.genes) %>%
graph_from_data_frame %>% average.path.length()
, net.tbl %>%
filter(n_edge_evidence > 1,
from %in% brain.genes & to %in% brain.genes,
interaction %in% directed_edge_types) %>%
graph_from_data_frame(directed=T) %>% average.path.length(directed = T)
),
assortativity_coef = c(
net.tbl %>%
graph_from_data_frame %>% assortativity(., types1 = V(.))
, net.tbl %>%
filter(interaction %in% directed_edge_types) %>%
graph_from_data_frame(directed=T) %>% assortativity(., types1 = V(.))
, net.tbl %>%
filter(n_edge_evidence > 1) %>%
graph_from_data_frame %>% assortativity(., types1 = V(.))
, net.tbl %>%
filter(n_edge_evidence > 1,
interaction %in% directed_edge_types) %>%
graph_from_data_frame(directed=T) %>% assortativity(., types1 = V(.))
, net.tbl %>%
filter(from %in% hpa.brain & to %in% hpa.brain) %>%
graph_from_data_frame %>% assortativity(., types1 = V(.))
, net.tbl %>%
filter(from %in% hpa.brain & to %in% hpa.brain,
interaction %in% directed_edge_types) %>%
graph_from_data_frame(directed=T) %>% assortativity(., types1 = V(.))
, net.tbl %>%
filter(from %in% tad.deg & to %in% tad.deg) %>%
graph_from_data_frame %>% assortativity(., types1 = V(.))
, net.tbl %>%
filter(from %in% tad.deg & to %in% tad.deg,
interaction %in% directed_edge_types) %>%
graph_from_data_frame(directed=T) %>% assortativity(., types1 = V(.))
, net.tbl %>%
filter(from %in% seaad.expr & to %in% seaad.expr) %>%
graph_from_data_frame(directed=T) %>% assortativity(., types1 = V(.))
, net.tbl %>%
filter(from %in% seaad.expr & to %in% seaad.expr,
interaction %in% directed_edge_types) %>%
graph_from_data_frame(directed=T) %>% assortativity(., types1 = V(.))
, net.tbl %>%
filter(from %in% brain.genes & to %in% brain.genes) %>%
graph_from_data_frame %>% assortativity(., types1 = V(.))
, net.tbl %>%
filter(from %in% brain.genes & to %in% brain.genes,
interaction %in% directed_edge_types) %>%
graph_from_data_frame %>% assortativity(., types1 = V(.))
, net.tbl %>%
filter(n_edge_evidence > 1,
from %in% brain.genes & to %in% brain.genes) %>%
graph_from_data_frame %>% assortativity(., types1 = V(.))
, net.tbl %>%
filter(n_edge_evidence > 1,
from %in% brain.genes & to %in% brain.genes,
interaction %in% directed_edge_types) %>%
graph_from_data_frame %>% assortativity(., types1 = V(.))
),
connected_components = c(
net.tbl %>%
graph_from_data_frame %>% no.clusters()
, net.tbl %>%
filter(interaction %in% directed_edge_types) %>%
graph_from_data_frame(directed=T) %>% no.clusters()
, net.tbl %>%
filter(n_edge_evidence > 1) %>%
graph_from_data_frame %>% no.clusters()
, net.tbl %>%
filter(n_edge_evidence > 1,
interaction %in% directed_edge_types) %>%
graph_from_data_frame(directed=T) %>% no.clusters()
, net.tbl %>%
filter(from %in% hpa.brain & to %in% hpa.brain) %>%
graph_from_data_frame %>% no.clusters()
, net.tbl %>%
filter(from %in% hpa.brain & to %in% hpa.brain,
interaction %in% directed_edge_types) %>%
graph_from_data_frame(directed=T) %>% no.clusters()
, net.tbl %>%
filter(from %in% tad.deg & to %in% tad.deg) %>%
graph_from_data_frame %>% no.clusters()
, net.tbl %>%
filter(from %in% tad.deg & to %in% tad.deg,
interaction %in% directed_edge_types) %>%
graph_from_data_frame(directed=T) %>% no.clusters()
, net.tbl %>%
filter(from %in% seaad.expr & to %in% seaad.expr) %>%
graph_from_data_frame %>% no.clusters()
, net.tbl %>%
filter(from %in% seaad.expr & to %in% seaad.expr,
interaction %in% directed_edge_types) %>%
graph_from_data_frame(directed=T) %>% no.clusters()
, net.tbl %>%
filter(from %in% brain.genes & to %in% brain.genes) %>%
graph_from_data_frame %>% no.clusters()
, net.tbl %>%
filter(from %in% brain.genes & to %in% brain.genes,
interaction %in% directed_edge_types) %>%
graph_from_data_frame(directed=T) %>% no.clusters()
, net.tbl %>%
filter(n_edge_evidence > 1,
from %in% brain.genes & to %in% brain.genes) %>%
graph_from_data_frame %>% no.clusters()
, net.tbl %>%
filter(n_edge_evidence > 1,
from %in% brain.genes & to %in% brain.genes,
interaction %in% directed_edge_types) %>%
graph_from_data_frame(directed=T) %>% no.clusters()
)
)
nw.stats %>%
pivot_longer(cols = c(nV,nE,avg_path_length,assortativity_coef,connected_components),
names_to = 'prop', values_to = 'val') %>%
mutate(prop = factor(prop, levels = c('nV','nE', 'avg_path_length','assortativity_coef','connected_components'))
, dir = factor(dir, levels = c('undirected','directed'))
, filt = factor(filt, levels = c('none','edge_evidence > 1','HPA_brain','omics_detect','seaAD_detect',
'all_expr','expr+evidence'))
) %>%
ggplot(aes(filt, val, fill = dir)) +
geom_bar(stat = 'identity', position = 'dodge')+
theme(legend.position = 'top',
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))+
facet_wrap(~prop, scales = 'free_y', ncol = 2)
save.image(
paste0(
Sys.Date() %>% str_replace_all('-','_'),
'_',
'base_nw_filtering.Rdata')
)
sessionInfo()
R version 4.2.1 (2022-06-23)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.5 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8
[6] LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] stats graphics grDevices datasets utils methods base
other attached packages:
[1] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.4 purrr_0.3.4 readr_1.4.0 tidyr_1.1.2 tibble_3.0.6 ggplot2_3.3.3 tidyverse_1.3.0
[10] tidygraph_1.2.0 igraph_1.2.6 synapser_0.11.7
loaded via a namespace (and not attached):
[1] nlme_3.1-157 fs_1.5.0 lubridate_1.7.9.2 httr_1.4.2 rprojroot_2.0.2 tools_4.2.1
[7] backports_1.2.1 utf8_1.1.4 R6_2.5.0 DT_0.17 mgcv_1.8-34 DBI_1.1.1
[13] lazyeval_0.2.2 colorspace_2.0-0 withr_2.4.1 tidyselect_1.1.0 gridExtra_2.3 curl_4.3
[19] compiler_4.2.1 VennDiagram_1.6.20 cli_2.3.0 rvest_0.3.6 formatR_1.7 xml2_1.3.2
[25] plotly_4.9.3 labeling_0.4.2 scales_1.1.1 digest_0.6.27 rmarkdown_2.20 pkgconfig_2.0.3
[31] htmltools_0.5.4 parallelly_1.23.0 dbplyr_2.1.0 fastmap_1.1.0 htmlwidgets_1.5.3 rlang_0.4.10
[37] readxl_1.3.1 rstudioapi_0.13 farver_2.0.3 generics_0.1.0 jsonlite_1.7.2 crosstalk_1.1.1
[43] magrittr_2.0.1 Matrix_1.4-1 futile.logger_1.4.3 fansi_0.4.2 Rcpp_1.0.6 munsell_0.5.0
[49] viridis_0.5.1 lifecycle_1.0.0 furrr_0.2.2 stringi_1.5.3 yaml_2.2.1 MASS_7.3-57
[55] grid_4.2.1 parallel_4.2.1 listenv_0.8.0 ggrepel_0.9.1 crayon_1.4.1 lattice_0.20-45
[61] splines_4.2.1 haven_2.3.1 cowplot_1.1.1 hms_1.0.0 knitr_1.31 pillar_1.4.7
[67] codetools_0.2-18 PythonEmbedInR_0.12.79 futile.options_1.0.1 reprex_1.0.0 glue_1.4.2 gprofiler2_0.2.0
[73] evaluate_0.14 lambda.r_1.2.4 data.table_1.13.6 renv_0.16.0-65 BiocManager_1.30.10 modelr_0.1.8
[79] vctrs_0.3.6 tweenr_1.0.1 cellranger_1.1.0 gtable_0.3.0 polyclip_1.10-0 future_1.21.0
[85] assertthat_0.2.1 pack_0.1-1 xfun_0.37 ggforce_0.3.2 broom_0.7.4 viridisLite_0.3.0
[91] globals_0.14.0 ellipsis_0.3.1 here_1.0.1